The purpose of this notebook is to analyze item-level data collected for study SGC-3: The Insight Hypothesis
#IMPORT DATA from fall and spring files
fall_items <- "data/fall_sgc3a_blocks.csv"
spring_items <- "data/spring_sgc3a_blocks.csv"
df_fall <- read.csv(fall_items)
df_spring <- read.csv(spring_items)
#Create combined data frame
df_items <- rbind(df_fall, df_spring)
#Create extra fields
df_items$time_sec <- df_items$rt / 1000 #item time in seconds
#Create answer-consistency column (desired values in column : TRI, ORTH, BOTH, NONE)
df_items$consistency = 0 #set initial dummy values
df_items <- df_items %>% mutate(consistency = replace(consistency, correct==1 & orth_correct==1, "Both"), #both
consistency = replace(consistency, correct==0 & orth_correct==1, "Ortho"), #orthogonal
consistency = replace(consistency, correct==1 & orth_correct==0, "Tri"), #triangular
consistency = replace(consistency, correct==0 & orth_correct==0, "Neither"), #neither
consistency = replace(consistency, answer=="", "BLANK")) #neither and BLANK
#Create TOTAL column for future sorting
df_totals <- df_items %>% filter(!q ==16) %>% group_by(subject) %>% summarise(TOTAL = sum(correct))
df_items <- left_join(df_items, df_totals)
# #Code incorrect responses (not triangular or otho correct)
# df_items$response <- df_items$orth_correct + df_items$correct #items not correct in tri or orth interpretation
# df_items$response <- dplyr::recode( df_items$response, `1`= "right", `2`="both", `0`="wrong" )
# df_items$incorrect <- dplyr::recode( df_items$response, "right"=0, "both"=0, "wrong"=1 )
#Create factors
df_items <- df_items %>% mutate(
subject = as.factor(subject),
session = as.factor(session),
term = as.factor(term),
condition = as.factor(condition),
consistency = as.factor(consistency),
explicit = as.factor(explicit),
impasse = as.factor(impasse),
axis = as.factor(axis),
q = as.factor(q),
question = as.factor(question),
error = 0 #temporary holder for error codes
)
#Change values of column names for later reshaping
# df_items <- rename(df_items, rs_incorrect = incorrect)
df_items <- rename(df_items, rs_tri = correct)
df_items <- rename(df_items, rs_ortho = orth_correct)
#Separate free response from (main) multiple choice blocks
df_freeResponse <- df_items %>% filter(q==16)
df_items <- df_items %>% filter (q!=16)
#Cleanup temporary dataframes
rm(df_fall, df_spring, df_totals)
#Set custom colors
lgrey = "#cacec8"
lgreen = "#c6edbb"
lyellow = "#FBEEBB"
lblue = "#BCD9EE"
I have the intuition that (for an individual participant) accurate — inaccurate responses are not randomly distributed across the timecourse of the study. There should be a substantial learning effect, such that response are incorrect — until the individual has LEARNS how the coordinate system works - and then responses will be correct.
#FILTER question-response data
l_timecourse <- df_items %>% filter(!q %in% c(6,9,16)) %>% #remove questions 6,9 where tri == ortho correct
select(subject,impasse,q,consistency,TOTAL)
#ORDER dataframe by subject total amount
l_timecourse <- l_timecourse %>% mutate(subject = fct_reorder(l_timecourse$subject, l_timecourse$TOTAL, min))
#WIDE dataframe (for manual inspection)
w_timecourse <- l_timecourse %>% spread(q,consistency)
#VISUALIZE
p1 <- ggplot(l_timecourse %>% filter(impasse ==1), aes(x = q, y = subject, fill = consistency, palette = "jco")) +
facet_grid(~ impasse) +
geom_raster() + scale_fill_manual(values = c("#cacec8", "#FBEEBB", "#BCD9EE","#c6edbb")) +
labs(x="Question Sequence", fill = "Response Type")
p2 <- ggplot((l_timecourse %>% filter(impasse==2)), aes(x = q, y = subject, fill = consistency)) +
facet_grid(~ impasse) +
geom_raster() + scale_fill_manual(values = c("#cacec8", "#FBEEBB", "#BCD9EE","#c6edbb")) +
labs(x="Question Sequence", fill = "Response Type")
figure <- ggarrange(p1, p2, ncol=1)
annotate_figure(figure,
top = text_grob("Timecourse of Response Type by Participant and Condition", color = "black", face = "bold", size = 14))
TODO: This plot of Response Type by Question (in sequence) for each participant demonstrates that … .
#SUMMARIZE items response-type by question (which relation is being asked)
df_items$dummy <- 1 #add a temporary dummy column for counting
df_questions <- df_items %>%
filter(!q %in% c(6,9)) %>%
group_by(q,condition) %>%
select(q,question,rs_tri, rs_ortho, consistency, dummy) %>%
summarize(n=n(),
rs_tri = sum(rs_tri),
rs_ortho = sum(rs_ortho),
rs_blank = sum(dummy[consistency == "BLANK"]),
rs_neither = sum(dummy[consistency == "Neither"]),
question = question[1])
#convert to LONG dataframe
l_questions <- df_questions %>%
pivot_longer(cols = starts_with("rs_"), names_to = "answer_type", values_to = "count")
#update factor level order in order to maximize comparison in plot
l_questions$answer_type = factor(l_questions$answer_type)
l_questions$answer_type <- relevel(l_questions$answer_type, 'rs_neither')
#TOTAL ACROSS CONDITIONS
ggplot(data = l_questions, aes (x = q, y=count, fill=answer_type))+
geom_bar(position="stack", stat="identity") +
facet_grid(~condition) +
scale_fill_manual(values = c(lyellow, lgrey,lblue,lgreen)) +
labs(title = "Answer Type by Question and Condition", x="Question Sequence", y="Count(n)", fill = "Response Type")
TODO: This plot of Response Type by Question (in sequence) and Condition demonstrates that… .
TODO: Test this with a linear mixed effects model. .
What are the alternative answers subjects give for the first question?
#Create a contingency table with marginal sums for all answers across both conditions on Q1
df_q1 <- df_items %>% filter(q==1)
t <- table(df_q1$answer, df_q1$condition)
t <- t[order(-rowSums(t)),] #reorder by marginal freqency
addmargins(t)
##
## 111 121 Sum
## A 50 1 51
## F 10 19 29
## 0 18 18
## O 0 14 14
## CF 1 6 7
## AI 0 3 3
## AF 1 0 1
## AO 0 1 1
## C 0 1 1
## I 0 1 1
## Sum 62 64 126
img_path <- "images/q1_A.png"
include_graphics(img_path)
(control: 50, impasse: 1) The most common erroneous response is the “A”. In the control condition, this is the orthogonal distractor. However, in the impasse condition, the ‘A’ is not orthogonal to the directed start time, but the closest nearly orthogonal response. Potentially revealing a satisficing strategy? Or visual error in seeing it as an orthogonal intersect?
#A
subjects <- df_items %>% filter(q==1 & answer == "A") %>% select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) +
geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)
#CODE ERROR TYPES from manual inference
df_items[(df_items$q==1) & (df_items$condition ==111) & (df_items$answer == "A"),]$error = "orthogonal"
df_items[(df_items$q==1) & (df_items$condition ==121) & (df_items$answer == "A"),]$error = "visualerror-right-30"
img_path <- "images/q1_F.png"
include_graphics(img_path)
(control: 10, impasse: 19) The second most common response was the correct (triangular) response “F”. In the impasse condition, there are no orthogonal-distractors. In the control condition, the A datapoint acts as an orthogonal distractor.
#F
subjects <- df_items %>% filter(q==1 & answer == "F") %>% select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) +
geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)
#CODE ERROR TYPES from manual inference
df_items[(df_items$q==1) & (df_items$condition ==111) & (df_items$answer == "F"),]$error = ""
df_items[(df_items$q==1) & (df_items$condition ==121) & (df_items$answer == "F"),]$error = ""
img_path <- "images/q1_blank.png"
include_graphics(img_path)
(control: 0, impasse: 18) The third most common response was (blank) no response, though this only occurred in the impasse condition; likely when subjects could not locate an orthogonal distractor, and chose not to revert to an alternative strategy.
#BLANK
subjects <- df_items %>% filter(q==1 & answer == "") %>% select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) +
geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)
#CODE ERROR TYPES from manual inference
df_items[(df_items$q==1) & (df_items$answer == "AF"),]$error = "fixed-no-response" #graphical fixedness, no response
img_path <- "images/q1_O.png"
include_graphics(img_path)
(control: 0, impasse: 14) The fourth most common response was an O though this only occurred in the impasse condition.Similar to the 1 person who entered an ‘A’, the ‘O’ response indicates either a visual-alignment error (thinking the O is an orthogonal intersector) OR a satisficing strategy to select the closes non-orthogonal data point. However, A is both closer (in y-distance height) as well as (x-distance). A has an orthogonal intersection of +0:30m, while O has an orthogonal intersection of -0:30m. However, because O has a greater duration (higher in the graph) the visual angle between 11am and 0 is less than that of A, making it more likely to be confused as true orthogonal intersection. In this sense, it sensical that event O has t[4,2] responses and A only t[1,2] responses.
TODO: Can mousepath be used to differentiate between a visual error (thinking O was orthogonal) and a satisficing decicion (closes point to orthogonal line)? . If the subject traces up from the start time, A is the first data point [impasse condition] they would come across. My inutuition is that folks that trace the orthogonal would select A, and folks that don’t (i.e. are just inspecting the graph and not using the mouse) would be more likely to select 0.
#O
subjects <- df_items %>% filter(q==1 & answer == "O") %>% select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) +
geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)
#CODE ERROR TYPES from manual inference
df_items[(df_items$q==1) & (df_items$answer == "AF"),]$error = "visualerror-left-30"
img_path <- "images/q1_CF.png"
include_graphics(img_path)
(control: 1, impasse: 6) The fifth most common response was the two-item entry C and F, both of which are diagonal intersects with the start time. However, this response violates the encoding convention that a given referent (i.e start time) has a single spatial encoding in the representation. This suggests a partial understanding of something diagonal about the coordinate system; the reader is attending to the gridlines and using them to intersect data and the x-axis. However, they’ve not decoded the spatial referent of start time vs. end time.
TODO: I would suspect this acts as a transition strategy, or an indication of partial undertanding that may eventually lead to full understanding, and unlikely to revert to orthogonal. MODEL THIS. .
#CF
subjects <- df_items %>% filter(q==1 & answer == "CF") %>% select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) +
geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)
#CODE ERROR TYPES from manual inference
df_items[(df_items$q==1) & (df_items$answer == "AF"),]$error = "lines-connect"
img_path <- "images/q1_AI.png"
include_graphics(img_path)
(control: 0, impasse: 3) The sixth most common response was the two item: A,I, with only having responses in the impasse condition. Both A and I are near-orthogonal distracts of 11AM, with +0:30m offset, suggesting this is either a satisficing or visual error case. We do not see this response in the control condition, most likely because there is a true orthogonal intersect.
#AI
subjects <- df_items %>% filter(q==1 & answer == "AI") %>% select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) +
geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)
#CODE ERROR TYPES from manual inference
df_items[(df_items$q==1) & (df_items$answer == "AI"),]$error = "visualerror-right-2"
img_path <- "images/q1_AF.png"
include_graphics(img_path)
(control: 1, impasse: 0) One subject in the control condition selected both the triangular and orthogonal-consistent responses. Considering this was only a single subject, and the multiple choice checkboxes were directly underneath each other in the response list, it is plausible that this was a clerical error, whereby the subject selected one response, and then the other, without unchecking the other box.
TODO: Can we verify this from the mouseclick stream data?
#AF
subjects <- df_items %>% filter(q==1 & answer == "AF") %>% select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) +
geom_raster() + scale_fill_manual(values = c(lyellow,lgreen,lblue,lgrey)) + facet_grid(~ impasse)
#CODE ERROR TYPES from manual inference
df_items[(df_items$q==1) & (df_items$answer == "AF"),]$error = "clerical"
img_path <- "images/q1_AO.png"
include_graphics(img_path)
(control: 0, impasse: 1) One subject in the impasse condition selected both events A and O, each near-orthogonal distractors, but in opposite directions. This sits as a hybrid between ‘lines connect’ in that they are both off the orthogonal intersection, and visual error. However, since the options are both off the orthogonal line, it seems unlikely the individual saw them both as accurate orthogonal intersects.
TODO: What is happening here? Can we recreate from the mouseclick data? Another clerical error? or something more interesting?
#AO
subjects <- df_items %>% filter(q==1 & answer == "AO") %>% select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) +
geom_raster() + scale_fill_manual(values = c(lyellow,lblue,lgreen,lgrey)) + facet_grid(~ impasse)
#CODE ERROR TYPES from manual inference
df_items[(df_items$q==1) & (df_items$answer == "AO"),]$error = "visualerror-right-left"
**How did this subject have an accidental? triangular answer on Q5?
img_path <- "images/q1_C.png"
include_graphics(img_path)
(control: 0, impasse: 1) One subject in the impasse condition selected event C, which is a diagonal intersect with the given start time, but ends, rather than starts, at 11.
I’m surprised to not see more of these type of responses, actually!
#C
subjects <- df_items %>% filter(q==1 & answer == "C") %>% select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) +
geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)
#CODE ERROR TYPES from manual inference
df_items[(df_items$q==1) & (df_items$answer == "AO"),]$error = "backwards"
**How did this subject have an accidental? triangular answers on Q4 Q10 Q14?
img_path <- "images/q1_I.png"
include_graphics(img_path)
(control: 0, impasse: 1) One subject in the impasse condition selected event I, which is a near-orthogonal distractor. What is interesting about this answer, is that the individual does NOT also select A, which falls along the same orthogonal intersect. This might suggest that a visual error judging I as orthogonal to 11 (but not A) because the it is so much higher in the graph.
I’m surprised to not see more of these type of responses, actually!
#O
subjects <- df_items %>% filter(q==1 & answer == "O") %>% select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) +
geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)
#CODE ERROR TYPES from manual inference
df_items[(df_items$q==1) & (df_items$answer == "O"),]$error = "visualerror-right"
What is the distribution of response times per question?
#SUMMARIZE response time
time.stats <- favstats(~time_sec, data = df_items)
time.stats
## min Q1 median Q3 max mean sd n missing
## 1.264 13.80425 24.8685 46.07975 336.033 35.46849 33.11745 1890 0
Response time per question (n=1890) ranged from 1.2s to 336s (5.5 minutes), with a M=35.5s, SD = 33.12s.
#VISUALIZE distribution of response times per question
gf_dhistogram(~time_sec, binwidth = 5, data = df_items) %>%
gf_vline(xintercept = ~time.stats$mean, color = "blue") %>%
gf_fitdistr(color="blue") %>%
gf_labs(title ="Distribution of response latency (per item)")
#VERIFY normality of resulting data with qqPlot
qqPlot(~time_sec, data = df_items)
However, the distribution is clearly not normal, so it is appropriate to transform the response latency variable.
#APPLY a log transform
df_items$log_time <- log(df_items$time_sec)
log_time.stats <- favstats(~log_time, data = df_items)
log_time.stats
gf_dhistogram(~log_time, data = df_items) %>%
gf_vline(xintercept = ~log_time.stats$mean, color = "blue") %>%
gf_fitdistr(color="blue") %>%
gf_labs(title ="Distribution of response latency (LOGT) on first question", x="Log-transform (seconds)")
#VERIFY normality of resulting data with qqPlot
qqPlot(~log_time, data = df_items)
TODO: Is this an appropriate outcome? What about the hump at the start of the plot?.
What is the distribution of response times on the very first question?
df_q1 <- df_items %>% filter (q==1)
q1.stats <- favstats(~time_sec, data = df_q1)
q1.stats
## min Q1 median Q3 max mean sd n missing
## 7.223 26.61575 39.315 52.17 161.362 44.52736 26.22314 126 0
Response time (in seconds) on the first question (n=126) ranged from 7.2 to 161 seconds, with a M = 44.5, SD = 26.2. The distribution is clearly not-normal.
gf_dhistogram(~ time_sec, data = df_q1) %>%
gf_vline(xintercept = ~q1.stats$mean, color = "blue") %>%
gf_fitdistr(color="blue") %>% # gf_dist("norm", color="blue", params=list(q1.stats$mean, q1.stats$sd), xlim(0,50)) %>%
gf_labs(title ="Distribution of response latency on first question")
#VERIFY normality of resulting data with qqPlot
qqPlot(~time_sec, data = df_q1)
Do response times differ by condition? TODO: EXPLORE how to model reaction time. https://lindeloev.github.io/shiny-rt/
time.stats <- favstats(time_sec ~ condition, data = df_q1)
time.stats
## condition min Q1 median Q3 max mean sd n missing
## 1 111 11.080 22.94175 35.1385 43.280 112.983 37.19852 20.51576 62 0
## 2 121 7.223 32.42225 46.4050 63.909 161.362 51.62717 29.20256 64 0
gf_dhistogram(~time_sec, fill = ~condition, data = df_q1) %>%
gf_facet_grid(condition~.) %>%
gf_vline(xintercept = ~mean, color = "red", data = time.stats)
The average total response latency (entire 15 question block) for the control condition was slightly higher (M = 9.54s, SD = 3.09) than the impasse-scaffold condition (M = 8.9, 2.69s)
time.stats <- favstats(log_time ~ condition, data = df_q1)
time.stats
## condition min Q1 median Q3 max mean sd n
## 1 111 2.405142 3.132936 3.559073 3.767658 4.727237 3.483336 0.5181027 62
## 2 121 1.977270 3.477854 3.837388 4.157262 5.083650 3.797496 0.5625236 64
## missing
## 1 0
## 2 0
gf_dhistogram(~log_time, fill = ~condition, data = df_q1) %>%
gf_facet_grid(condition~.) %>%
gf_vline(xintercept = ~mean, color = "red", data = time.stats)
qqPlot(~log_time, data = df_q1)
## [1] 65 113
#LINEAR MODEL with dependent variable
m1 <- lm(time_sec ~ condition, data = df_q1)
summary(m1)
##
## Call:
## lm(formula = time_sec ~ condition, data = df_q1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.404 -17.456 -4.720 9.201 109.735
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.199 3.214 11.57 < 2e-16 ***
## condition121 14.429 4.509 3.20 0.00175 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 25.3 on 124 degrees of freedom
## Multiple R-squared: 0.07627, Adjusted R-squared: 0.06882
## F-statistic: 10.24 on 1 and 124 DF, p-value: 0.001746
#LINEAR MODEL with log-transformed dependent variable
mT <- lm(log_time ~ condition, data = df_q1)
summary(mT)
##
## Call:
## lm(formula = log_time ~ condition, data = df_q1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8202 -0.3504 0.0503 0.3382 1.2861
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.48334 0.06872 50.686 < 2e-16 ***
## condition121 0.31416 0.09643 3.258 0.00145 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5411 on 124 degrees of freedom
## Multiple R-squared: 0.07885, Adjusted R-squared: 0.07142
## F-statistic: 10.61 on 1 and 124 DF, p-value: 0.001448
The difference in average response time (for the first question) IS statistically significant F(1,124) = 10.61 , p < 0.05, with a linear model predicting response time from condition explaining around 8% of variance. (The log-transform model reduces residual standard error from 25 to 0.5.)
TODO consider item type
First, verify (sanity check!) that the flatten.js data wrangling scripts were correct by generating participant totals directly from item-level data, and compare with participant level file. Comparison of summarized data from item_level files and participant level file show that the question accuracy totals for each participant are the same.
#SUMMARIZE FROM ITEMS FILES
df_item_sanity <- df_items %>% filter(!q == 16) %>% group_by(subject) %>% summarise(
tri_correct = sum(rs_tri),
orth_correct = sum(rs_ortho),
total = tri_correct + orth_correct) %>%
mutate( subject = factor(subject)) %>%
arrange(desc(subject))
#SUMMARIZE FROM PARTICIPANT FILES
fall_participants <- "data/fall_sgc3a_participants.csv"
spring_participants <- "data/spring_sgc3a_participants.csv"
df_fall_p <- read.csv(fall_participants)
df_spring_p <- read.csv(spring_participants)
df_participants_sanity <- rbind(df_fall_p, df_spring_p) %>%
mutate(subject = factor(subject),
tri_correct = triangular_score,
orth_correct = orthogonal_score,
total = tri_correct + orth_correct
) %>%
select(subject, tri_correct, orth_correct, total) %>%
arrange(desc(subject))
#CHECK EQUALITY
all_equal(df_participants_sanity,df_item_sanity)
## [1] TRUE
#REMOVE TEMPORARY DFS
rm(fall_participants, spring_participants, df_participants_sanity, df_fall_p,df_spring_p)
df_items_by_participant <- df_item_sanity
rm(df_item_sanity, df_fall, df_spring)
## Warning in rm(df_item_sanity, df_fall, df_spring): object 'df_fall' not found
## Warning in rm(df_item_sanity, df_fall, df_spring): object 'df_spring' not found
How many subjects were run in each data collection session?
#MANUALLY INSPECT SESSIONS
df_items %>% group_by(session) %>%
summarize(n=length(unique(subject)))
## # A tibble: 22 × 2
## session n
## <fct> <int>
## 1 alpha 4
## 2 bravo 7
## 3 charlie 15
## 4 delta 2
## 5 echo 9
## 6 fire 5
## 7 flower 1
## 8 foxtrot 3
## 9 golf 10
## 10 heaven 3
## # … with 12 more rows